

do_match <- function( data_probs ) {

  dt_for_match <- data_probs %>% 
    data.frame %>% 
    filter(month_from_index==0) %>% 
    select(-c("DMG_date_of_death_XX","full_month_to_death")) %>% 
    mutate(group = as.factor(ifelse(died_first_year==F,"Survivor","Decedent")),
           CNRS_topo_main_groups = factor(CNRS_topo_main_groups))


  rownames(dt_for_match) <- dt_for_match$id_var
  
  match_obj <- MatchIt::matchit(died_first_year ~ prob_for_report  ,
                                data = dt_for_match,
                                method = "nearest",
                                replace = T,
                                ratio = 1,
                                distance = "mahalanobis",
                                exact = c("CNRS_topo_main_groups"))
  
  pairs <- match_obj$match.matrix %>%
    data.frame() %>%
    mutate(treat = as.numeric(as.character(row.names(.))),
           control = as.numeric(as.character(X1))) %>%
    select(-X1)
  
  
  dt_pairs<- dt_for_match %>% 
    filter( id_var %in% pairs$treat ) %>%
    inner_join(pairs, by = c( "id_var"="treat") ) %>% 
    left_join(dt_for_match ,
              by = c("control" = "id_var")) %>% 
    mutate(delta = prob_for_report.x - prob_for_report.y)%>%
    group_by(control) %>% mutate(n=n(),
                                 rank = rank(abs(delta))) %>%
    arrange(control,rank) %>% 
    filter(rank <= 10 ) %>% 
    filter(abs(delta)<0.05)
  
  return( dt_pairs) 
} 


do_match_describe<- function(dt_pairs = match_res,
                             data_probs) {

  dt_for_match <- data_probs %>% 
    data.frame %>% 
    filter(month_from_index==0) %>% 
    select(-c("DMG_date_of_death_XX","full_month_to_death")) %>% 
    mutate(group = as.factor(ifelse(died_first_year==F,"Survivor","Decedent")),
           CNRS_topo_main_groups = factor(CNRS_topo_main_groups))

  rownames(dt_for_match) <- dt_for_match$id_var
  
  dt_pairs %>% 
    mutate(delta = cut(delta,
                       breaks = seq(-0.05,0.05,0.01) ,
                       include.lowest = T)) %>% 
    ggplot(.,aes(x = delta)) +
    geom_bar() + 
    labs(x="Within Match Difference in Predicted Mortality" ,
         y = "Number of Matches")+
    theme(axis.text.x =element_text(angle=45, hjust=1)) +
    ggsave("matching_histogram_delta.pdf", w=10,h=7)


final_n_type <- dt_pairs %>%
  ungroup() %>% 
  group_by(CNRS_topo_main_groups.x) %>%
  summarise('Number of Unique Matched Decedent' = length(unique(id_var)),
              'Number of Unique Matched Survivor' = length(unique(control))) %>% 
  bind_cols(
    dt_for_match %>% 
      group_by(CNRS_topo_main_groups, group) %>%
      summarise(N =  length(unique(id_var))) %>% 
      pivot_wider( id_cols = "CNRS_topo_main_groups",
                   names_from = "group",
                   values_from = "N")
  ) %>%
  rename( 'Cancer Type' = CNRS_topo_main_groups.x,
              'Total Number of Decedent' = Decedent,
              'Total Number of Survivor' = Survivor) %>% 
  select(1,5,2,6,3) %>% arrange(5) 

final_n_type %>% 
  bind_rows(tibble("Cancer Type" = "Total" )  %>%
              bind_cols(final_n_type  %>% select(-1) %>%
                          purrr::map_df(~c(.x, sum(.x, na.rm=TRUE))) %>%  tail(1) 
                        )
            ) %>%  
  kable("latex", booktabs = T, align = c("l", rep("r", ncol(.)-1))) %>%
  kable_styling(full_width = F ) %>% 
  row_spec(0, align = "c") %>% 
  write("matching_describe.tex")

dt_pairs %>% 
  group_by(control,CNRS_topo_main_groups.y) %>%
  summarise(phat    = mean(prob_for_report.x),
            repeats = n()) %>%
  arrange(-repeats) %>% 
  group_by(repeats) %>% 
  summarise(n_of_controls = n(),
            phat = mean(phat)) %>%
  mutate(n_of_treat = n_of_controls*repeats) %>%
  purrr::map_df(~c(.x, sum(.x, na.rm=TRUE))) %>% 
  kable("latex", booktabs = T, align = c("l", rep("r", ncol(.)-1))) %>%
  kable_styling(full_width = F ) %>% 
  row_spec(0, align = "c") %>% 
  write("matching_control_n.tex")

}

          
add_days_lived <- function(data) { 
  
  data %>% 
    mutate(days_lived =  ifelse(full_month_to_death==0,
                                        as.numeric(difftime(DMG_date_of_death_XX,
                                                            S_index_date_XX , 
                                                            units = "days"))+1 ,
                                        30) ) %>% 
    mutate( days_lived = if_else(is.na(full_month_to_death),30,days_lived  )) %>% 
    mutate( days_lived = if_else( days_lived>30,30,days_lived  ))
}

calc_table_2 <- function(pairs = dt_pairs,
                         probs =  build_dyn_new() ,
                         data_filterd = dt_mc,
                         name = "Total") {

pairs  %>% ungroup() %>%
  select(id_var,control)  %>% 
  ## all months of decedent:
  left_join(probs %>% add_days_lived() %>% select(id_var,month_from_index,CNRS_topo_main_groups,days_lived ),
            by = "id_var") %>% 
  left_join(probs %>% add_days_lived() %>% select(id_var,month_from_index,CNRS_topo_main_groups,days_lived ),
            by = c("control" = "id_var" ,"month_from_index" = "month_from_index") ) %>% 
  left_join(data_filterd,
            by = c("id_var" = "id_var","month_from_index" = "months_after_dx_0")) %>% 
  left_join(data_filterd, 
            by = c("control" = "id_var" ,"month_from_index" = "months_after_dx_0") ) %>% 
  mutate(actual_cost.x = if_else(is.na(actual_cost.x),0,actual_cost.x),
         actual_cost.y = if_else(is.na(actual_cost.y),0,actual_cost.y)) %>%
  ungroup() %>% 
  summarise(Decedent = sum(actual_cost.x,na.rm = T)/sum(days_lived.x)*30,
            Survivor = sum(actual_cost.y, na.rm = T)/sum(days_lived.y)*30) %>% 
  mutate(Difference = Decedent- Survivor,
         Category = name) %>% 
  select(4,1,2,3)
}




print_table_2_matched <- function(intensity_wards = c("oncology","internal_medicine",
                                                       "geriatry","rehabilitation"),
                                  filename = "oncology_low",
                                  ...) {

  full_table <-   rbind(calc_table_2(name = "Total", data_filterd=make_slice_cost_month(dt_cost_for_exhibits_cancer_month), ... ),
                        calc_table_2(name = "All Inpatient:",
                                     data_filterd = make_slice_cost_month(dt_cost_for_exhibits_cancer_month[main_cat%like%"Inpatient"]), ... ),
                        calc_table_2(name = "Planned"      , data_filterd = make_slice_cost_month(dt_cost_for_exhibits_cancer_month[main_cat=="Inpatient_Planned"]), ... ),
                        calc_table_2(name = "Unplanned"    , data_filterd = make_slice_cost_month(dt_cost_for_exhibits_cancer_month[main_cat=="Inpatient_Unplanned"]), ... ),
                        calc_table_2(name = "Low Intensity", data_filterd = make_slice_cost_month(dt_cost_for_exhibits_cancer_month[profession%in%intensity_wards]), ... ),
                        calc_table_2(name = "High Intensity", data_filterd = make_slice_cost_month(dt_cost_for_exhibits_cancer_month[!profession%in%intensity_wards]),... ),
                        calc_table_2(name = "Other Services:",
                                     data_filterd = make_slice_cost_month(dt_cost_for_exhibits_cancer_month[!main_cat%like%"Inpatient"]), ... ),
                        calc_table_2(name = "Drugs", data_filterd = make_slice_cost_month(dt_cost_for_exhibits_cancer_month[main_cat=="Drugs"]), ... ),
                        calc_table_2(name = "Outpatient", data_filterd = make_slice_cost_month(dt_cost_for_exhibits_cancer_month[main_cat=="Outpatient"]), ... ),
                        calc_table_2(name = "Imaging", data_filterd = make_slice_cost_month(dt_cost_for_exhibits_cancer_month[main_cat=="Imaging"]), ... ),
                        calc_table_2(name = "Other", data_filterd = make_slice_cost_month(dt_cost_for_exhibits_cancer_month[main_cat=="Others"]), ... )
  ) %>% data.table()

orig_full<-copy(full_table)


full_table[,sh_total := format(round((Difference/full_table[1,]$Difference)*100 ,1 ),nsmall = 1)]

full_table[, c(2,3,4) :=  lapply(.SD,scales::comma,accuracy=1), .SDcols = c(2,3,4)]

full_table[c(3:6,8:11), Category:=paste0("~~",Category)]


invisible(Hmisc::latex(
  full_table,
  file = paste0("Tab_2_matching_",filename,".tex"),
  center = 'centering',
  n.cgroup = c(1,1, 1 ,2 ),
  cgroup   = c("","Survivor", "Decedent", "Difference"),
  extracolheads = c("","(1)", "(2)", "(3)", "(4)"),
  
  colheads = c("\\thead{Category}",
               "\\thead{Adjusted for \\\\ Survival\\\\Duration}", 
               "\\thead{Adjusted for \\\\ Survival\\\\Duration}",
               "\\thead{Decedent -\\\\ Survivor}",
               "\\thead{Percent of \\\\ Total Difference}"),
  rowname =NULL,
  col.just = c("l", rep.int("r", 4)), 
  extracolsize = "normalsize"
))

return(full_table)
}



